

### -------------------------------------------------- #
### ---- Load & Clean Datasets: ----
### -------------------------------------------------- #

### -------------------------------------------------- #
### ** Read VF from CSV ----
### -------------------------------------------------- #

# Run function to read CSV into R
vf_state <- data.table::fread(input = paste0(vf_file_path,ABB,".csv"),
                              verbose = FALSE,
                              colClasses=rep("character",198),
                              select = cols_to_keep) %>% dplyr::as_tibble()


### -------------------------------------------------- #
### ** Read TFA from CSV ----
### -------------------------------------------------- #

# Load tfa_dat_flag from file #108
load(paste0(file_path_data_matched,"/State_TFA_Subsets/tfa_to_state_",ABB,"_flags_v2.RData")) 


### -------------------------------------------------- #
### ** Standardize VF Variables for Matching ----
### -------------------------------------------------- #

# Standardize VF Dataset (TRIM NAMES)
vf_state %<>% 
  mutate_at(all_match_vars, trimws) %>%
  mutate_at(all_match_vars, funs(na_if(.,""))) %>%
  mutate(Sex = na_if(Sex,"U")) 


### -------------------------------------------------- #
### ** Drop match vars if empty ----
### -------------------------------------------------- #

# Find missing match variables 
keep_match_vars <- sapply(all_match_vars, function(x) sum(is.na(vf_state[,x]))!=nrow(vf_state) )
if(sum(keep_match_vars)!=length(all_match_vars)) {
  print("Note: Some match variables contain no values in this voter file. The following variables can be used in the match.")
  print(keep_match_vars)
}

# Create new variable without missing variables  
vnames <- all_match_vars[keep_match_vars]

# Subset to remove match 2-5 zipcode variables 
addressVars <- c("MailingAddressZip5","RegistrationAddressZip5")
vnames %<>% .[!. %in% addressVars]


### -------------------------------------------------- #
### ---- Run fastLink() ----
### -------------------------------------------------- #

### -------------------------------------------------- #
### ** (Match 1) Birthday Match ----
### -------------------------------------------------- #

### Run fastLink() and save output [run and save parts seperately]
fastLink_JBC(dfA = tfa_dat_match_state,
             dfB = vf_state,
             varnames = vnames,
             threshold.match = 0.85,
             verbose = TRUE,
             dedupe.matches = TRUE,
             stringdist.match = c("LastName"),
             partial.match = c("LastName"),
             ABB = ABB, attempt_name = "bday_parts_v2",
             file_path_root = file_path,
             n.core=5)

### -------------------------------------------------- #
### ---- Clear Memory ----
### -------------------------------------------------- #

### Clear global environment
rm(list=setdiff(ls(),
                c(lsf.str(), #functions
                "file_path","vf_file_path", "fp110", #file paths
                "all_match_vars", "cols_to_keep"))
) 

### Memory management 
gc()